Load some packages:
library(tidyverse) # manipulate and visulaize data
theme_set(theme_light(base_size = 16)) # set theme for visualisation and font size
library(unmarked) # occupancy modelling
library(sf) # spatial datavizWe use data from Mapping and explaining wolf recolonization in France using dynamic occupancy models and opportunistic data by Louvrier et al. in Ecography available at https://onlinelibrary.wiley.com/doi/abs/10.1111/ecog.02874. Data were collected between 1994 and 2017 (23 years or primary occasion). Occasions are months December, January, February and March (4 visits or secondary occasions). A 100km\(^2\) grid was overlaid over France and detections and non-detections were collected. See paper for more details.
First detection/non-detection data:
y <- readRDS("data/wolf_louvrier.rds")
dim(y)## [1] 3450 92
There are > 3000 cells in row. In columns we have 23 years * 4 visits, arranged in visits within year format, that is visit 1 to visit 4 for year 1994 in the first four columns, then visit 1 to visit 4 for year 1995, and so on.
Now read in some covariates data. First the survey effort which is (approx) the number of observers per cell:
effort <- readRDS("data/effort_louvrier.rds")The effort covariate is a yearly site-level covariate, with values that do not vary between visits within a year, but that do vary between years:
dim(effort)## [1] 3450 23
We have some environmental covariates as well, all site-level covariates:
envcov <- readRDS("data/envcov_louvrier.rds")The 7 covariates are specifically in column: farmland cover, altitude, distance to closest barrier, road density, forest cover, high altitude, rock cover:
colnames(envcov)## [1] "p_agri" "p_alti" "p_dbarr" "p_road" "p_forest" "p_halt" "p_rock"
Last, we have two additional yearly site-level covariates which capture dispersal abilities of wolves, namely the number of occupied neighboring cells at short (10km) and long (150km) distance:
# short distance
SDAC <- readRDS("data/shortd_spatialautocorr.rds")
# long distance
LDAC <- readRDS("data/longd_spatialautocorr.rds")Site-level covariates first:
sites.covs <- data.frame(
forest = envcov[,"p_forest"], # forest cover
agr = envcov[,"p_agri"], # farmland cover
rock = envcov[,"p_rock"], # rock cover
halt = envcov[,"p_halt"], # high altitude
alt = envcov[,"p_alti"], # altitude
dbarr = envcov[,"p_dbarr"], # distance to closest barrier
acc = envcov[,"p_road"]) # road densityOn rassemble les covariables dont les valeurs diffèrent selon la cellule et l’année.
year <- matrix(rep(c('01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18','19','20','21','22','23'),nrow(y)),
nrow = nrow(y),
ncol = 23,
byrow = TRUE)
head(year)## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
## [1,] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12" "13" "14"
## [2,] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12" "13" "14"
## [3,] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12" "13" "14"
## [4,] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12" "13" "14"
## [5,] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12" "13" "14"
## [6,] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12" "13" "14"
## [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23]
## [1,] "15" "16" "17" "18" "19" "20" "21" "22" "23"
## [2,] "15" "16" "17" "18" "19" "20" "21" "22" "23"
## [3,] "15" "16" "17" "18" "19" "20" "21" "22" "23"
## [4,] "15" "16" "17" "18" "19" "20" "21" "22" "23"
## [5,] "15" "16" "17" "18" "19" "20" "21" "22" "23"
## [6,] "15" "16" "17" "18" "19" "20" "21" "22" "23"
trendyear <- matrix(rep(1:23,nrow(y)),
nrow=nrow(y),
ncol=23,
byrow=TRUE)
head(trendyear)## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
## [1,] 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## [2,] 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## [3,] 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## [4,] 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## [5,] 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## [6,] 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23]
## [1,] 15 16 17 18 19 20 21 22 23
## [2,] 15 16 17 18 19 20 21 22 23
## [3,] 15 16 17 18 19 20 21 22 23
## [4,] 15 16 17 18 19 20 21 22 23
## [5,] 15 16 17 18 19 20 21 22 23
## [6,] 15 16 17 18 19 20 21 22 23
Yearly site-level covariates:
yearly.site.covs <- list(
effort = effort, # effort
year = year, # year effect
trendyear = trendyear, # linear trend over time
SDAC = SDAC, # short distance
LDAC = LDAC) # long distanceOn rassemble les covariables dont les valeurs diffèrent selon les occasions secondaires.
occ <- array(0,c(3450,4,23))
for (i in 1:3450){
for (j in 1:23){
occ[i,1:4,j] <- c('1','2','3','4') # dec, jan, feb, mar
if (effort[i,j] == 0) occ[i,1:4,j] <- NA
}
}
occbis <- occ[,,1]
for (i in 2:23){
occbis <- cbind(occbis,occ[,,i])
}
dim(occbis) # 3450 x 92 ou 92 est 4 occ x 23 ans## [1] 3450 92
obs.covs <- list(OCC = occbis)Organise detection/non-detection data and covariates in
data.frame that can be used by unmarked:
umf <- unmarkedMultFrame(y = y,
siteCovs = sites.covs,
yearlySiteCovs = yearly.site.covs,
obsCovs = obs.covs,
numPrimary = 23)Summarize data:
summary(umf)## unmarkedFrame Object
##
## 3450 sites
## Maximum number of observations per site: 92
## Mean number of observations per site: 57.91
## Number of primary survey periods: 23
## Number of secondary survey periods: 4
## Sites with at least one detection: 547
##
## Tabulation of y observations:
## 0 1 2 <NA>
## 196086 3215 503 117596
##
## Site-level covariates:
## forest agr rock halt
## Min. :-1.218838 Min. :-1.50457 Min. :-0.253118 Min. :-0.172297
## 1st Qu.:-0.939161 1st Qu.:-0.83057 1st Qu.:-0.253118 1st Qu.:-0.172297
## Median :-0.102772 Median : 0.04001 Median :-0.253118 Median :-0.172297
## Mean : 0.003797 Mean :-0.01065 Mean : 0.006638 Mean : 0.004287
## 3rd Qu.: 0.754320 3rd Qu.: 0.82406 3rd Qu.:-0.253118 3rd Qu.:-0.172297
## Max. : 3.087256 Max. : 1.89463 Max. : 8.074589 Max. :11.368946
## alt dbarr acc
## Min. :-0.99359 Min. :-1.153377 Min. :-1.732584
## 1st Qu.:-0.61255 1st Qu.:-0.825046 1st Qu.:-0.682261
## Median :-0.33687 Median :-0.248552 Median : 0.329160
## Mean : 0.01199 Mean : 0.002765 Mean :-0.005365
## 3rd Qu.: 0.34066 3rd Qu.: 0.573661 3rd Qu.: 0.757069
## Max. : 4.41822 Max. : 3.898308 Max. : 1.846292
##
## Observation-level covariates:
## OCC
## 1 : 49951
## 2 : 49951
## 3 : 49951
## 4 : 49951
## NA's:117596
##
## Yearly-site-level covariates:
## effort year trendyear SDAC
## Min. :0.0000 01 : 3450 Min. : 1 Min. :0.000
## 1st Qu.:0.0000 02 : 3450 1st Qu.: 6 1st Qu.:0.000
## Median :0.1278 03 : 3450 Median :12 Median :0.000
## Mean :0.6243 04 : 3450 Mean :12 Mean :0.209
## 3rd Qu.:0.7983 05 : 3450 3rd Qu.:18 3rd Qu.:0.000
## Max. :8.0075 06 : 3450 Max. :23 Max. :8.000
## (Other):58650
## LDAC
## Min. :-1.0912
## 1st Qu.:-0.6831
## Median :-0.4912
## Mean : 0.0171
## 3rd Qu.: 0.5300
## Max. : 2.5113
##
Fit a model with all parameters constant,but detection which is a function of effort:
fm0 <- colext(
psiformula = ~ 1, # initial occupancy
gammaformula = ~ 1, # colonization
epsilonformula = ~ 1, # extinction
pformula = ~ effort, # detection
data = umf, # data
control = list(trace = 1)) # display number of iterations by blocks of 10## initial value 37465.552381
## iter 10 value 12706.122901
## iter 20 value 11195.999294
## iter 30 value 10963.405378
## final value 10963.302306
## converged
Inspect results:
fm0##
## Call:
## colext(psiformula = ~1, gammaformula = ~1, epsilonformula = ~1,
## pformula = ~effort, data = umf, control = list(trace = 1))
##
## Initial:
## Estimate SE z P(>|z|)
## -6.17 0.673 -9.16 5.25e-20
##
## Colonization:
## Estimate SE z P(>|z|)
## -4.01 0.0509 -78.6 0
##
## Extinction:
## Estimate SE z P(>|z|)
## -1.3 0.0653 -19.9 5.89e-88
##
## Detection:
## Estimate SE z P(>|z|)
## (Intercept) -2.236 0.0819 -27.3 3.70e-164
## effort 0.543 0.0276 19.7 1.37e-86
##
## AIC: 21936.6
Model coefficients (or parameters):
coef(fm0)## psi(Int) col(Int) ext(Int) p(Int) p(effort)
## -6.1658594 -4.0053675 -1.2988148 -2.2358314 0.5434858
Get parameter estimates on natural scale. Start with colonization:
backTransform(fm0, type = "col")## Backtransformed linear combination(s) of Colonization estimate(s)
##
## Estimate SE LinComb (Intercept)
## 0.0179 0.000895 -4.01 1
##
## Transformation: logistic
Then extinction:
backTransform(fm0, type = "ext")## Backtransformed linear combination(s) of Extinction estimate(s)
##
## Estimate SE LinComb (Intercept)
## 0.214 0.011 -1.3 1
##
## Transformation: logistic
And initial occupancy:
backTransform(fm0, type = "psi")## Backtransformed linear combination(s) of Initial estimate(s)
##
## Estimate SE LinComb (Intercept)
## 0.0021 0.00141 -6.17 1
##
## Transformation: logistic
Finally, we get detection as a function of effort:
# grid
effort_grid <- seq(min(yearly.site.covs$effort), max(yearly.site.covs$effort), length = 100)
# predict on logit scale
logit_det <- coef(fm0)[4] + coef(fm0)[5] * effort_grid
# backtransform
det <- plogis(logit_det)
# plot
plot(x = effort_grid,
y = det,
type = 'l',
xlab = "effort",
ylab = "estimated detection probability",
lwd= 3)Here we fit the model with all covariates as considered by Louvrier and colleagues in their paper:
fm <- colext(
psiformula = ~ 1, # initial occupancy
gammaformula = ~ forest + agr + halt + alt + SDAC + LDAC, # colonization
epsilonformula = ~ 1, # extinction
pformula = ~ effort + acc + OCC, # detection
data = umf, # data
control = list(trace = 1), # display number of iterations by blocks of 10
se = FALSE) # do not compute SE and confidence intervals to reduce computational burden## initial value 37465.552381
## iter 10 value 12721.165262
## iter 20 value 11671.497667
## iter 30 value 10755.918886
## iter 40 value 9850.393837
## iter 50 value 9803.647828
## final value 9799.071153
## converged
Inspect results (NA’s are produced because we did not compute SE’s when we fitted the model):
fm##
## Call:
## colext(psiformula = ~1, gammaformula = ~forest + agr + halt +
## alt + SDAC + LDAC, epsilonformula = ~1, pformula = ~effort +
## acc + OCC, data = umf, se = FALSE, control = list(trace = 1))
##
## Initial:
## Estimate SE z P(>|z|)
## -6.28 NA NA NA
##
## Colonization:
## Estimate SE z P(>|z|)
## (Intercept) -5.166 NA NA NA
## forest 0.929 NA NA NA
## agr 0.768 NA NA NA
## halt -0.144 NA NA NA
## alt 0.834 NA NA NA
## SDAC 0.576 NA NA NA
## LDAC 0.384 NA NA NA
##
## Extinction:
## Estimate SE z P(>|z|)
## -1.11 NA NA NA
##
## Detection:
## Estimate SE z P(>|z|)
## (Intercept) -2.497 NA NA NA
## effort 0.393 NA NA NA
## acc -0.842 NA NA NA
## OCC2 0.495 NA NA NA
## OCC3 0.436 NA NA NA
## OCC4 0.419 NA NA NA
##
## AIC: 19628.14
Model coefficients are:
coef(fm)## psi(Int) col(Int) col(forest) col(agr) col(halt) col(alt)
## -6.2803098 -5.1664074 0.9291481 0.7678356 -0.1435305 0.8342459
## col(SDAC) col(LDAC) ext(Int) p(Int) p(effort) p(acc)
## 0.5761154 0.3843558 -1.1123998 -2.4965811 0.3927426 -0.8415244
## p(OCC2) p(OCC3) p(OCC4)
## 0.4949260 0.4359181 0.4188788
Parameter estimates are given on the logit scale. You may compare them to those obtained by Louvrier and colleagues using a Bayesian approach.
There are two ways to map annual occupancy. We may compute the
occupancy probability \(\psi_{i,t}\),
or we rely directly on realized occupancy, that is the latent state
\(z_{i,t}\) which tells us whether cell
\(i\) is occupied in year \(t\) (\(z_{i,t} =
1\)) or not (\(z_{i,t} = 0\)).
We go for realized occupancy. To do so, we need the \(z\) estimates that we can get in
unmarked via empirical Bayes methods: The posterior
distribution of \(z\) is estimated with
data and the parameter estimates we obtained from fitting our model
above. The mode of the posterior distribution is the “empirical best
unbiased predictor (EBUP)”.
re <- ranef(fm) # estimate posterior distribution of z
z_mode <- bup(re, stat = "mode") # get mode of posterior distributionL’objet z_mode contient les \(z\) estimés avec en lignes les cellules de
la grille et en colonnes les années. Mettons tout ça sur une carte.
Get a map of France:
france <- st_read("shp/pays/Country.shp")## Reading layer `Country' from data source
## `/Users/oliviergimenez/Dropbox/OG/GITHUB/occupancy-workshop/tutorials/shp/pays/Country.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 54 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -2177943 ymin: 5251488 xmax: 4523874 ymax: 11388590
## Projected CRS: RGF93 v1 / Lambert-93
then our 10x10km grid:
grid_rect <- st_read("shp/grille10par10/grille_France.shp") %>%
st_transform(crs = st_crs(france))## Reading layer `grille_France' from data source
## `/Users/oliviergimenez/Dropbox/OG/GITHUB/occupancy-workshop/tutorials/shp/grille10par10/grille_France.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 5160 features and 1 field
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 510000 ymin: 6110000 xmax: 1110000 ymax: 6970000
## Projected CRS: RGF93 v1 / Lambert-93
On récupère les coordonnées de nos cellules échantillonnées au moins une fois au cours de l’étude.
coord <- readRDS("data/coordcells_wolf.rds")
grid <- coord %>%
st_as_sf(coords = c('X','Y'), crs = st_crs(grid_rect))L’objet grid ainsi créé est un objet spatial
sf de type POINT, il me faudrait plutôt des
POLYGONS pour les représenter en couleur sur la grille.
grid_poly <- st_join(grid_rect, grid, join = st_covers)
grid_poly <- grid_poly[!is.na(grid_poly$id),]Enfin, une carte ! On prend l’année 2017 la plus récente dans le jeu de données.
grid_rect %>%
ggplot() +
geom_sf(alpha = 0, lwd = 0.01) +
geom_sf(data = grid_poly, aes(fill = as_factor(z_mode[,23])),
lwd = 0.01) +
geom_sf(data = france %>% filter(NAME == "France"), alpha = 0) +
scale_fill_manual(name = "",
values = c("gray90",
"steelblue1"),
labels = c("cellule non-occupée",
"cellule occupée")) +
labs(title = "Carte de l'occupancy du loup en 2017",
subtitle = "Données réseau loup")Construisons une fonction qui fait la carte de l’occupancy pour une année quelconque. Cette fonction prend comme argument year entre 1995 et 2017.
occ_plot <- function(year){
if (year < 1995) stop("Le loup venait juste d'arriver, pas la peine d'en faire une carte")
if (year > 2017) stop("Pas de données aussi récentes, va falloir attendre")
index <- year - 1994
grid_rect %>%
ggplot() +
geom_sf(alpha = 0, lwd = 0.01) +
geom_sf(data = grid_poly, aes(fill = as_factor(z_mode[,index])),
lwd = 0.01) +
geom_sf(data = france %>% filter(NAME == "France"), alpha = 0) +
scale_fill_manual(name = "",
values = c("gray90",
"steelblue1"),
labels = c("non-occupée",
"occupée")) +
labs(subtitle = year) +
theme(legend.position = "none")
}On fait un essai avec l’année 2016.
occ_plot(2016)On fait 3 cartes pour les années 2000, 2010 et 2017.
library(patchwork)
plot_list <- lapply(X = c(2000, 2010, 2017), FUN = occ_plot)
plot_list[[1]] | plot_list[[2]] | plot_list[[3]]